home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Power Programmierung
/
Power-Programmierung (Tewi)(1994).iso
/
assemblr
/
library
/
edit
/
fremacsr
/
search.asm
< prev
next >
Wrap
Assembly Source File
|
1987-03-17
|
18KB
|
853 lines
;History:697,24,17
include memory.def
data segment byte public
NULL equ 0
b_struc struc
b db ?
b_struc ends
w_struc struc
w dw ?
w_struc ends
CHR equ "C"
BOL equ "<"
EOL equ ">"
ANY equ "?"
CCL equ "["
ECCL equ "]"
NCCL equ "~"
EOS equ "."
CLOSURE equ "*"
CLOSIZE equ 1
extrn outpat: byte
extrn OUTPATSIZE: abs
extrn inpat: byte
extrn INPATSIZE: abs
inpatlen dw ?
direction dw ?
slow_search db ? ;=1 if we need to process magic chars.
extrn textseg: word
data ends
bufseg segment public
extrn topbot: word
extrn bottop: word
bufseg ends
code segment byte public
assume cs:code, ds:data
public slowly, forward, backward
extrn get_mark: near, set_mark_si: near
public search
search:
;enter with ch=start mark, cl=end mark, dh=first mark, dl=last mark.
;start searching at mark ch. If the string is found, then return the
; beginning in mark dh, and the end in mark dl, and cy=0. If the string
; wasn't found, return cy=1.
push dx ;save the first, last marks.
push es
mov es,textseg
assume es:bufseg
push ds ;save ds
push es
pop ds
assume ds:bufseg ;for get_mark
mov al,cl ;get the end mark.
push cx
call get_mark
mov di,si ;save a copy of the end.
pop cx
mov al,ch ;get the start mark.
call get_mark
pop ds ;restore ds
assume ds:data
mov direction,0
cmp si,di ;start>=end?
jb search_4 ;no. (doesn't matter if they're equal)
mov direction,1 ;yes, go in reverse direction.
search_4:
cmp slow_search,0 ;must we do a slow search?
jne search_2 ;yes.
cmp direction,0 ;search in proper direction.
jne search_5
call forward
jmp search_3
search_5:
call backward
jmp search_3
search_2:
call slowly
search_3:
pop es
assume es:data
pop dx
jc search_1 ;not found.
push ds
mov ds,textseg ;for set_mark_si
assume ds:bufseg
push bx ;preserve pointer to end.
mov al,dh
call set_mark_si ;set the first mark.
pop si ;pushed as bx.
mov al,dl
call set_mark_si ;set the last mark.
pop ds
assume ds:data
clc ;return a match.
ret
search_1:
stc ;return no match.
ret
public regexp_pat
regexp_pat:
;enter with si, cx->pattern.
;exit with cy=1 if error.
cmp cx,INPATSIZE ;too big?
jae regexp_5 ;yes - return cy.
mov di,offset inpat
rep movsb
mov al,NULL ;store the terminating null.
stosb
call makepat
jc regexp_4 ;if any error, quit now.
;If outpat has only CHRs, we can use the fast search. Fix up inpat so it
; has the real string. We must do this because of the '\' escape char.
mov si,offset outpat
mov di,offset inpat
mov bp,di ;make a copy of di.
regexp_1:
lodsb
cmp al,EOS ;if we got to EOS, all is ok.
je regexp_2
cmp al,CHR ;CHR?
jne regexp_3 ;no - can't use fast search.
movsb ;move the char to inpat.
jmp regexp_1
regexp_2:
sub di,bp ;subtract the base of the string.
mov inpatlen,di ;save the real length.
mov slow_search,0
clc
jmp short regexp_4
regexp_3:
mov slow_search,1
clc
regexp_4:
ret
regexp_5:
stc
ret
public literal_pattern
literal_pattern:
;enter with si, cx->pattern.
;exit with cy=1 if error.
cmp cx,INPATSIZE ;too big?
jae literal_3 ;yes - return cy.
push si
push cx
mov inpatlen,cx
mov di,offset inpat
rep movsb
pop cx
pop si
mov di,offset outpat
jcxz literal_1
literal_2:
mov ax,offset outpat-2
add ax,OUTPATSIZE
cmp di,ax ;do we have enough room?
jae literal_3 ;no - quit now.
mov al,CHR
stosb
movsb
loop literal_2
literal_1:
mov al,EOS ;store the end of string.
stosb
mov slow_search,0 ;we always use a fast search.
clc
ret
literal_3:
stc
ret
assume ds:data, es:bufseg
slowly:
;es:si -> first char to look at.
;es:di -> after last char to look at.
;return cy if no match, else nc, si->start of match, bx->after end of match.
slowly_0:
cmp si,topbot ;at topbot already?
jne slowly_1
mov si,bottop
slowly_1:
cmp si,di ;at the end yet?
stc ;assume not found.
je slowly_3 ;yes - not found.
push di
mov di,offset outpat ;start at beginning of pattern.
call amatch
pop di ;restore current, end.
jnc slowly_3 ;we found a match
cmp direction,0 ;forwards or backwards?
jne slowly_2 ;backwards.
inc si
cmp si,topbot ;at bottom of top?
jne slowly_5 ;yes - load top of bottom.
mov si,bottop
jmp slowly_1
slowly_5:
cmp es:[si-1].w,LINENEW ;at newline?
jne slowly_1 ;no.
inc si ;yes - skip LF part of newline.
jmp slowly_0
slowly_2:
cmp si,bottop ;at top of bottom?
jne slowly_4 ;no.
mov si,topbot ;yes - load bottom of top.
slowly_4:
dec si ;back up to previous character.
cmp si,bottop ;at top of bottom now?
je slowly_1 ;yes - can't possibly be split over newline.
cmp es:[si-1].w,LINENEW ;at newline?
jne slowly_1 ;no.
dec si ;yes - skip to beginning of newline.
jmp slowly_1
slowly_3:
ret
forward:
;es:si -> first char to look at.
;es:di -> after last char to look at.
;return cy if no match, else nc, si->start of match, bx->after end of match.
cmp si,topbot ;is start in bottom?
jne forward_8 ;yes - no need to split.
mov si,bottop
forward_8:
cmp di,bottop ;is finish in top?
jne forward_9 ;yes - no need to split.
mov di,topbot
forward_9:
cmp si,bottop ;is start in bottom?
jae forward_1 ;yes - no need to split.
cmp di,topbot ;is finish in top?
jbe forward_1 ;yes - no need to split.
push di
mov di,topbot
call forward ;recursively search top
mov ax,di
pop di
jnc forward_2 ;we found it - exit.
mov si,ax ;start where forward left off.
push di
mov di,bottop ;and end where it will begin again.
call slowly
pop di
jnc forward_2 ;they found it - exit.
mov si,bottop ;no need to save the old si.
call forward
jmp short forward_2 ;in any case, exit.
forward_1:
mov cx,di ;count the number of chars to look at.
sub cx,si
mov di,si ;prepare for scasb.
mov bx,inpatlen
dec bx
sub cx,bx ;this many fewer chars to look at.
jb forward_5 ;string is shorter than search.
forward_3:
jcxz forward_5 ;no chars to look at.
mov si,offset inpat
lodsb ;get the first char.
forward_4:
scasb ;look for the first char.
loopnz forward_4 ;keep looking until we find it.
jnz forward_5 ;we didn't
xchg cx,bx ;set the count to the string length.
push cx ;save the string length
push di ;save the source position
repe cmpsb ;is this it?
mov cx,bx ;restore the search length
pop di ;restore the source position
pop bx ;restore the string length
jne forward_3 ;no match - try at next position.
cmp inpat,LF ;are we searching for an LF first string?
jne forward_6 ;no - don't worry.
cmp byte ptr es:[di-2],CR ;did we just match the LF part of a newline?
je forward_3 ;yes - no match.
forward_6:
cmp inpat[bx],CR ;are we searching for a CR last string?
jne forward_7 ;no - don't worry.
cmp byte ptr es:[di+bx],LF ;did we just match the CR part of a newline?
je forward_3 ;yes - no match.
forward_7:
mov si,di ;get the source position
add bx,si ;add it to the count to get the end.
dec si ;make it point to the first char again.
clc
jmp short forward_2
forward_5:
stc
forward_2:
ret
backward:
;es:si -> first char to look at.
;es:di -> after last char to look at.
;return cy if no match, else nc, si->start of match, bx->after end of match.
cmp si,bottop ;moving backwards, adjust topbot.
jne backward_8
mov si,topbot
backward_8:
cmp di,bottop ;moving backwards, adjust topbot.
jne backward_9
mov di,topbot
backward_9:
cmp di,topbot ;is finish in bottom?
ja backward_1 ;yes - no need to split.
cmp si,bottop ;is start in top?
jb backward_1 ;yes - no need to split.
push di
mov di,bottop
call backward ;recursively search top
mov ax,di
pop di
jnc backward_2 ;we found it - exit.
mov si,ax ;start where backward left off.
push di
mov di,topbot ;and end where backward will begin again.
call slowly
pop di
jnc backward_2 ;they found it - exit.
mov si,topbot ;no need to save the old si.
call backward
jmp short backward_2 ;in any case, exit.
backward_1:
mov cx,si ;count the number of chars to look at.
sub cx,di
mov di,si ;prepare for scasb.
mov bx,inpatlen
dec bx
sub cx,bx ;this many fewer chars to look at.
jb backward_5 ;string is shorter than search.
sub di,bx ;back up that many chars.
add di,2-1 ;pre-increment for loop and size.
backward_3:
sub di,2 ;restore next char.
jcxz backward_5 ;no chars to look at.
mov si,offset inpat
lodsb ;get the first char.
std ;now scan backwards.
backward_4:
scasb ;look for the first char.
loopnz backward_4 ;keep looking until we find it.
cld ;now compare, etc. forwards.
jnz backward_5 ;we didn't find it.
add di,2 ;go forwards to the next char.
xor al,al ;in case string length-1=0.
xchg cx,bx ;set the count to the string length.
push cx ;save the string length
push di ;save the source position
repe cmpsb ;is this it?
mov cx,bx ;restore the search length
pop di ;restore the source position
pop bx ;restore the string length
jne backward_3 ;no match - try at next position.
cmp inpat,LF ;are we searching for an LF first string?
jne backward_6 ;no - don't worry.
cmp byte ptr es:[di-2],CR ;did we just match the LF part of a newline?
je backward_3 ;yes - no match.
backward_6:
cmp inpat[bx],CR ;are we searching for a CR last string?
jne backward_7 ;no - don't worry.
cmp byte ptr es:[di+bx],LF ;did we just match the CR part of a newline?
je backward_3 ;yes - no match.
backward_7:
mov si,di ;get the source position
add bx,si ;add it to the count to get the end.
dec si ;make it point to the first char again.
clc
jmp short backward_2
backward_5:
stc
backward_2:
ret
amatch:
;es:si -> source text
;ds:di -> pattern
;return cy if no match, else nc, bx->end of matching string
push si ;preserve input pointers.
push di
amatch_1:
mov al,[di]
cmp al,EOS
mov bx,si ;prepare to exit.
je amatch_success
cmp al,CLOSURE
jne amatch_3
add di,CLOSIZE
mov bx,si ;save the first closure pattern.
;match as many as we can
amatch_4:
call omatch
jnc amatch_4
;match only as many as fit the next pattern
call patsiz
add di,ax
amatch_5:
push bx
call amatch ;try to match rest of pattern.
pop ax ;conserve stack
jnc amatch_success ;go if it matched.
mov bx,ax ;restore bx.
cmp si,bottop ;backing up past the point?
jne amatch_8 ;no - just decrement.
mov si,topbot ;yes - get the bottom of the top.
amatch_8:
dec si ;point to the previous character.
cmp si,bx ;zero or more matches still?
jae amatch_5 ;yes.
stc
jmp short amatch_exit ;can't get rest of pattern to match.
amatch_3:
call omatch
jc amatch_exit ;unsuccessful - exit.
amatch_7:
call patsiz
add di,ax
jmp amatch_1
amatch_success:
clc
amatch_exit:
pop di ;restore input pointers.
pop si
ret
omatch:
;omatch matches at most one character, and only if it returns true at
; omatch_yes. When we get to omatch_yes, we see if we are at the point.
;es:si -> source text
;ds:di -> pattern
mov al,[di]
cmp al,CHR
jne omatch_1
mov al,es:[si]
cmp al,[di+1]
jne omatch_no
inc si
jmp omatch_yes
omatch_1:
cmp al,BOL
jne omatch_2
cmp si,bottop ;are we at the point?
jne omatch_1_1 ;no.
push si ;yes - have to look at the top.
mov si,topbot
cmp es:[si-2].w,LINENEW
pop si
jne omatch_no
jmp omatch_yes
omatch_1_1:
cmp es:[si-2].w,LINENEW
jne omatch_no
jmp omatch_yes
omatch_2:
cmp al,EOL
jne omatch_3
cmp es:[si].w,LINENEW
jne omatch_no
jmp omatch_yes
omatch_3:
cmp al,ANY
jne omatch_4
cmp es:[si].w,LINENEW
je omatch_no
inc si
jmp omatch_yes
omatch_4:
cmp al,CCL
jne omatch_5
cmp es:[si].w,LINENEW
je omatch_no
call locate
jc omatch_no
inc si
jmp omatch_yes
omatch_5:
cmp al,NCCL
jne omatch_6
cmp es:[si].w,LINENEW
je omatch_no
call locate
jnc omatch_no
inc si
jmp omatch_yes
omatch_6:
;error
jmp omatch_no
omatch_no:
stc
ret
omatch_yes:
cmp si,topbot ;at bottom of top?
jne omatch_yes_1
mov si,bottop ;yes, go to top of bottom.
omatch_yes_1:
clc
ret
locate:
;es:si -> search string
;ds:di -> CCL
;exit with cy=0 if found.
push cx
mov cl,[di+1] ;get the count.
mov ch,0
mov al,es:[si]
push es ;save es, di
push di
push ds ;outpat is in ds
pop es
add di,2 ;di now -> characters.
repne scasb
pop di ;restore es,di
pop es
pop cx
jne locate_1
clc
ret
locate_1:
stc
ret
patsiz:
;enter ds:di -> pat
mov al,[di]
cmp al,CHR
jne patsiz_1
mov ax,2
ret
patsiz_1:
cmp al,CLOSURE
jne patsiz_2
mov ax,CLOSIZE
ret
patsiz_2:
cmp al,BOL
je patsiz_3
cmp al,EOL
je patsiz_3
cmp al,ANY
jne patsiz_4
patsiz_3:
mov ax,1
ret
patsiz_4:
cmp al,CCL
je patsiz_5
cmp al,NCCL
jne patsiz_6
patsiz_5:
mov al,[di+1]
mov ah,0
add ax,2
ret
patsiz_6:
;error
ret
assume ds:data, ds:data
public eol_only
eol_only:
;return zr if the search pattern matches eol only.
; we need this routine because search and replace should advance past the
; newline if we're matching eol only.
cmp word ptr outpat,EOS*256 + EOL
ret
makepat:
;si -> source pat (null terminated)
;di -> dest pattern, dx -> last dest entry.
;bx -> last closure
;return cy=1 if error.
mov si,0
mov di,0
mov dx,OUTPATSIZE
mov bx,-1
makepat_1:
cmp inpat[si],NULL
je makepat_0
push di
mov al,inpat[si]
cmp al,'\' ;are we escaping something?
jne makepat_a
cmp inpat[si+1],NULL ;is the '\' at the end?
je makepat_9 ;yes - just use \.
inc si
mov al,inpat[si] ;get the escaped char.
jmp makepat_9 ;go stick it in.
makepat_a:
cmp al,ANY
jne makepat_3
call addset
jmp makepat_2
;this really belongs at the end of makepat, but the short jump can't get there.
makepat_0:
mov al,EOS
call addset
cmp di,dx
jne makepat__0_1
stc
ret
makepat__0_1:
clc
ret
makepat_3:
cmp al,BOL
jne makepat_7
cmp si,0
jne makepat_6
call addset
jmp makepat_2
makepat_6:
call addchar
jmp makepat_2
makepat_7:
cmp al,EOL
jne makepat_8
cmp inpat[si+1],NULL
jne makepat_9
call addset
jmp makepat_2
makepat_9:
call addchar
jmp makepat_2
makepat_8:
cmp al,CCL
jne makepat_10
call getccl
jnc makepat_2
pop di
stc
ret
makepat_10:
cmp al,CLOSURE
jne makepat_11
cmp bx,0 ;is bx>0?
jnge makepat_12 ;no - not closure.
mov al,outpat[bx]
cmp al,CLOSURE ;trying to close a closure?
je makepat_12 ;yes - not closure.
cmp al,BOL ;trying to close a beginning of line?
je makepat_12 ;yes - not closure.
call stclos
add sp,2 ;throw away the old previous.
push bx
jmp makepat_2
makepat_11:
cmp al,NCCL
jne makepat_13
cmp inpat[si+1],NULL ;not special at the end.
je makepat_13
mov al,NCCL
call addset
mov al,1 ;one character follows.
call addbyte
mov al,inpat[si+1]
call addbyte
inc si ;skip the NCCL.
jmp makepat_2
makepat_13:
makepat_12:
call addchar
jmp makepat_2
makepat_2:
pop bx
inc si
jmp makepat_1
addchar:
;al = CHR to put.
push ax
mov al,CHR
call addset
pop ax
call addbyte
ret
addset: ;only command chars call addset.
addbyte:
;al = char to put, di->dest, dx->end of dest.
cmp di,dx
je addbyte_1
mov outpat[di],al
inc di
addbyte_1:
ret
stclos:
;si->last set added + 1
;bx-> last closure added
push di
stclos_1:
dec di
mov al,outpat[di]
mov outpat[di+CLOSIZE],al
cmp di,bx
jne stclos_1
stclos_2:
mov outpat[bx],CLOSURE
pop di
add di,CLOSIZE
ret
getccl:
;si -> source (null terminated)
;di -> dest, dx -> end of dest
;return cy=1 if error.
inc si
mov al,inpat[si]
cmp al,NCCL
jne getccl_1
call addset
inc si
jmp getccl_2
getccl_1:
mov al,CCL
call addset
getccl_2:
push bx
mov bx,di
call addbyte ;leave room for count
call dodash
mov ax,di
sub ax,bx
dec ax
mov outpat[bx],al
pop bx
cmp inpat[si],ECCL
je getccl_3
stc
ret
getccl_3:
clc
ret
dodash:
;si -> source pattern (null terminated)
;di -> destination pattern
;dx -> end of destination pattern
push bx
mov bx,si
dodash_1:
mov al,inpat[si]
or al,al
je dodash_2
cmp al,ECCL
je dodash_2
cmp al,"-"
je dodash_4
call addbyte
jmp dodash_8
dodash_4:
cmp si,bx ;at beginning?
je dodash_5
cmp inpat[si],NULL ;or at end?
jne dodash_6
dodash_5:
mov al,"-" ;if at beginning or at end, just a "-"
call addbyte
jmp dodash_8
dodash_6:
mov al,inpat[si-1]
cmp al,inpat[si+1]
ja dodash_5
call alphanumeric
jnc dodash_5
mov al,inpat[si+1]
call alphanumeric
jnc dodash_5
mov al,inpat[si-1]
dodash_7:
inc al ;pre-increment -- the first one's there.
cmp al,inpat[si+1]
ja dodash_9
call addbyte
jmp dodash_7
dodash_9:
inc si
dodash_8:
inc si
jmp dodash_1
dodash_2:
pop bx
ret
alphanumeric:
;return cy=1 if al is alphanumeric
cmp al,"0"
jb alphanumeric_1
cmp al,"9"
jbe alphanumeric_2
cmp al,"A"
jb alphanumeric_1
cmp al,"Z"
jbe alphanumeric_2
cmp al,"a"
jb alphanumeric_1
cmp al,"z"
jbe alphanumeric_2
alphanumeric_1:
clc
ret
alphanumeric_2:
stc
ret
code ends
end